static int l1, l2, oos_count, page_count;
#define FILE_AND_LINE 0
-//#define MFN_TO_WATCH 0x4700
+//#define MFN2_TO_WATCH 0x1d8
#if FILE_AND_LINE
#define adjust(_p, _a) _adjust((_p), (_a), __FILE__, __LINE__)
void _adjust(struct pfn_info *page, int adjtype ADJUST_EXTRA_ARGS)
{
-#ifdef MFN_TO_WATCH
- if (page_to_pfn(page) == MFN_TO_WATCH)
+#ifdef MFN2_TO_WATCH
+ if (page_to_pfn(page) == MFN2_TO_WATCH)
{
- APRINTK("adjust(mfn=%p, dir=%d, adjtype=%d) MFN_TO_WATCH",
+ APRINTK("adjust(mfn=%p, dir=%d, adjtype=%d)",
page_to_pfn(page), dir, adjtype);
}
#endif
adjust_shadow_tables();
}
+ //printk("d->shared_info=%p __pa()=%p\n", d->shared_info, __pa(d->shared_info));
+ adjust(virt_to_page(d->shared_info), 1);
+
return errors;
}
d->shared_info = (void *)alloc_xenheap_page();
memset(d->shared_info, 0, PAGE_SIZE);
ed->vcpu_info = &d->shared_info->vcpu_data[ed->eid];
- SHARE_PFN_WITH_DOMAIN(virt_to_page(d->shared_info), d);
+ SHARE_PFN_WITH_DOMAIN2(virt_to_page(d->shared_info), d);
machine_to_phys_mapping[virt_to_phys(d->shared_info) >>
PAGE_SHIFT] = INVALID_M2P_ENTRY;
#include <xen/spinlock.h>
#include <asm/uaccess.h>
+#ifdef PERF_COUNTERS
+#include <xen/sched.h>
+#include <xen/perfc.h>
+#endif
+
extern struct exception_table_entry __start___ex_table[];
extern struct exception_table_entry __stop___ex_table[];
extern struct exception_table_entry __start___pre_ex_table[];
unsigned long fixup = search_one_table(
__start___pre_ex_table, __stop___pre_ex_table-1, addr);
DPRINTK("Pre-exception: %p -> %p\n", addr, fixup);
+#ifdef PERF_COUNTERS
+ if ( fixup )
+ perfc_incrc(exception_fixed);
+#endif
return fixup;
}
gpfn = l1_pgentry_to_pfn(mk_l1_pgentry(l1e));
gmfn = __gpfn_to_mfn(d, gpfn);
- if ( unlikely(!gmfn) )
+ if ( unlikely(!VALID_MFN(gmfn)) )
return 0;
if ( unlikely(shadow_mode_enabled(d)) )
}
-void put_page_type(struct pfn_info *page)
+void _put_page_type(struct pfn_info *page)
{
u32 nx, x, y = page->u.inuse.type_info;
}
-int get_page_type(struct pfn_info *page, u32 type)
+int _get_page_type(struct pfn_info *page, u32 type)
{
u32 nx, x, y = page->u.inuse.type_info;
unsigned int cmd = val & MMUEXT_CMD_MASK, type;
struct exec_domain *ed = current;
struct domain *d = ed->domain, *e;
- unsigned long gpfn = ptr >> PAGE_SHIFT;
- unsigned long mfn = __gpfn_to_mfn(d, gpfn);
+ unsigned long mfn = ptr >> PAGE_SHIFT;
struct pfn_info *page = &frame_table[mfn];
u32 x, y, _d, _nd;
domid_t domid;
type = PGT_l1_page_table | PGT_va_mutable;
pin_page:
- if ( unlikely(percpu_info[cpu].foreign &&
- (shadow_mode_translate(d) ||
- shadow_mode_translate(percpu_info[cpu].foreign))) )
- {
- // oops -- we should be using the foreign domain's P2M
- mfn = __gpfn_to_mfn(FOREIGNDOM, gpfn);
- page = &frame_table[mfn];
- }
-
if ( shadow_mode_enabled(FOREIGNDOM) )
type = PGT_writable_page;
#endif /* __x86_64__ */
case MMUEXT_UNPIN_TABLE:
- if ( unlikely(percpu_info[cpu].foreign &&
- (shadow_mode_translate(d) ||
- shadow_mode_translate(percpu_info[cpu].foreign))) )
- {
- // oops -- we should be using the foreign domain's P2M
- mfn = __gpfn_to_mfn(FOREIGNDOM, gpfn);
- page = &frame_table[mfn];
- }
-
if ( unlikely(!(okay = get_page_from_pagenr(mfn, FOREIGNDOM))) )
{
MEM_LOG("mfn %p bad domain (dom=%p)",
cleanup_writable_pagetable(d);
if ( unlikely(shadow_mode_enabled(d)) )
- {
check_pagetable(ed, "pre-mmu"); /* debug */
- }
/*
* If we are resuming after preemption, read how much work we have already
}
cmd = req.ptr & (sizeof(l1_pgentry_t)-1);
- gpfn = req.ptr >> PAGE_SHIFT;
- mfn = __gpfn_to_mfn(d, gpfn);
+ mfn = req.ptr >> PAGE_SHIFT;
okay = 0;
if ( shadow_mode_log_dirty(d) )
__mark_dirty(d, mfn);
+ gpfn = __mfn_to_gpfn(d, mfn);
+ ASSERT(gpfn);
if ( page_is_page_table(page) )
shadow_mark_mfn_out_of_sync(ed, gpfn, mfn);
}
break;
case MMU_MACHPHYS_UPDATE:
+
+ // HACK ALERT... This about this later...
+ //
+ if ( unlikely(shadow_mode_translate(FOREIGNDOM) && IS_PRIV(d)) )
+ {
+ rc = FOREIGNDOM->next_io_page++;
+ printk("privileged guest dom%d requests mfn=%p for dom%d, gets pfn=%p\n",
+ d->id, mfn, FOREIGNDOM->id, rc);
+ set_machinetophys(mfn, rc);
+ set_p2m_entry(FOREIGNDOM, rc, mfn);
+ okay = 1;
+ break;
+ }
+ BUG();
+
if ( unlikely(!get_page_from_pagenr(mfn, FOREIGNDOM)) )
{
MEM_LOG("Could not get page for mach->phys update");
LOCK_BIGLOCK(dom);
- if ( !(mfn = __gpfn_to_mfn(dom, gpfn)) ) {
+ if ( !VALID_MFN(mfn = __gpfn_to_mfn(dom, gpfn)) ) {
UNLOCK_BIGLOCK(dom);
return -EINVAL;
}
unsigned long smfn;
int pin = 0;
- if ( (psh_type != PGT_snapshot) &&
- !shadow_promote(d, gpfn, gmfn, psh_type) )
- {
- FSH_LOG("promotion of pfn=%p mfn=%p failed! external gnttab refs?\n",
- gpfn, gmfn);
- return 0;
- }
-
page = alloc_domheap_page(NULL);
if ( unlikely(page == NULL) )
{
switch ( psh_type )
{
case PGT_l1_shadow:
+ if ( !shadow_promote(d, gpfn, gmfn, psh_type) )
+ goto oom;
perfc_incr(shadow_l1_pages);
d->arch.shadow_page_count++;
break;
case PGT_l2_shadow:
+ if ( !shadow_promote(d, gpfn, gmfn, psh_type) )
+ goto oom;
perfc_incr(shadow_l2_pages);
d->arch.shadow_page_count++;
if ( PGT_l2_page_table == PGT_root_page_table )
break;
case PGT_hl2_shadow:
+ // Treat an hl2 as an L1 for purposes of promotion.
+ // For external mode domains, treat them as an L2 for purposes of
+ // pinning.
+ //
+ if ( !shadow_promote(d, gpfn, gmfn, PGT_l1_shadow) )
+ goto oom;
perfc_incr(hl2_table_pages);
d->arch.hl2_page_count++;
-
- // treat an hl2 as an L1 for purposes of promotion,
- // and as an L2 for purposes of pinning.
- //
- if ( PGT_l2_page_table == PGT_root_page_table )
+ if ( shadow_mode_external(d) &&
+ (PGT_l2_page_table == PGT_root_page_table) )
pin = 1;
break;
shadow_pin(smfn);
return smfn;
+
+ oom:
+ FSH_LOG("promotion of pfn=%p mfn=%p failed! external gnttab refs?\n",
+ gpfn, gmfn);
+ free_domheap_page(page);
+ return 0;
}
static void inline
static void inline
free_shadow_hl2_table(struct domain *d, unsigned long smfn)
{
- printk("free_shadow_hl2_table(smfn=%p)\n", smfn);
-
- l1_pgentry_t *pl1e = map_domain_mem(smfn << PAGE_SHIFT);
+ l1_pgentry_t *hl2 = map_domain_mem(smfn << PAGE_SHIFT);
int i, limit;
if ( shadow_mode_external(d) )
limit = DOMAIN_ENTRIES_PER_L2_PAGETABLE;
for ( i = 0; i < limit; i++ )
- put_page_from_l1e(pl1e[i], d);
+ {
+ unsigned long hl2e = l1_pgentry_val(hl2[i]);
+ if ( hl2e & _PAGE_PRESENT )
+ put_page(pfn_to_page(hl2e >> PAGE_SHIFT));
+ }
- unmap_domain_mem(pl1e);
+ unmap_domain_mem(hl2);
}
static void inline
ASSERT( ! IS_INVALID_M2P_ENTRY(gpfn) );
- delete_shadow_status(d, gpfn, type);
+ delete_shadow_status(d, gpfn, gmfn, type);
switch ( type )
{
ed->arch.monitor_vtable = 0;
}
+int
+set_p2m_entry(struct domain *d, unsigned long pfn, unsigned long mfn)
+{
+ unsigned long phystab = pagetable_val(d->arch.phys_table);
+ l2_pgentry_t *l2, l2e;
+ l1_pgentry_t *l1;
+ struct pfn_info *l1page;
+ unsigned long va = pfn << PAGE_SHIFT;
+
+ ASSERT( phystab );
+
+#ifdef WATCH_MAP_DOMAIN_CALLERS
+ int old_map_domain_mem_noisy = map_domain_mem_noisy;
+ map_domain_mem_noisy = 0;
+#endif
+
+ l2 = map_domain_mem(phystab);
+ if ( !l2_pgentry_val(l2e = l2[l2_table_offset(va)]) )
+ {
+ l1page = alloc_domheap_page(NULL);
+ if ( !l1page )
+ return 0;
+
+ l1 = map_domain_mem(page_to_pfn(l1page) << PAGE_SHIFT);
+ memset(l1, 0, PAGE_SIZE);
+ unmap_domain_mem(l1);
+
+ l2e = l2[l2_table_offset(va)] =
+ mk_l2_pgentry((page_to_pfn(l1page) << PAGE_SHIFT) |
+ __PAGE_HYPERVISOR);
+ }
+ unmap_domain_mem(l2);
+
+ l1 = map_domain_mem(l2_pgentry_val(l2e) & PAGE_MASK);
+ l1[l1_table_offset(va)] = mk_l1_pgentry((mfn << PAGE_SHIFT) |
+ __PAGE_HYPERVISOR);
+ unmap_domain_mem(l1);
+
+#ifdef WATCH_MAP_DOMAIN_CALLERS
+ map_domain_mem_noisy = old_map_domain_mem_noisy;
+#endif
+
+ return 1;
+}
+
static int
alloc_p2m_table(struct domain *d)
{
struct list_head *list_ent;
- struct pfn_info *page, *l2page, *l1page;
- l2_pgentry_t *l2, l2e, last_l2e = mk_l2_pgentry(0);
- l1_pgentry_t *l1 = NULL;
- unsigned long va, mfn, pfn;
+ struct pfn_info *page, *l2page;
+ l2_pgentry_t *l2;
+ unsigned long mfn, pfn;
l2page = alloc_domheap_page(NULL);
if ( !l2page )
d->arch.phys_table = mk_pagetable(page_to_pfn(l2page) << PAGE_SHIFT);
l2 = map_domain_mem(page_to_pfn(l2page) << PAGE_SHIFT);
memset(l2, 0, PAGE_SIZE);
+ unmap_domain_mem(l2);
list_ent = d->page_list.next;
while ( list_ent != &d->page_list )
ASSERT(pfn != INVALID_M2P_ENTRY);
ASSERT(pfn < (1u<<20));
- va = pfn << PAGE_SHIFT;
- if ( !l2_pgentry_val(l2e = l2[l2_table_offset(va)]) )
- {
- l1page = alloc_domheap_page(NULL);
- if ( !l1page )
- return 0;
- l2e = l2[l2_table_offset(va)] =
- mk_l2_pgentry((page_to_pfn(l1page) << PAGE_SHIFT) |
- __PAGE_HYPERVISOR);
- }
+ set_p2m_entry(d, pfn, mfn);
- if ( l2_pgentry_val(last_l2e) != l2_pgentry_val(l2e) )
- {
- if ( l1 )
- unmap_domain_mem(l1);
- l1 = map_domain_mem(l2_pgentry_val(l2e) & PAGE_MASK);
- last_l2e = l2e;
- }
-
- l1[l1_table_offset(va)] = mk_l1_pgentry((mfn << PAGE_SHIFT) |
- __PAGE_HYPERVISOR);
list_ent = page->list.next;
}
- if ( l1 )
- unmap_domain_mem(l1);
- unmap_domain_mem(l2);
-
return 1;
}
{
// external guests provide their own memory for their P2M maps.
//
- unsigned long mfn = pagetable_val(d->arch.phys_table)>>PAGE_SHIFT;
- ASSERT( d == page_get_owner(&frame_table[mfn]) );
+ ASSERT( d == page_get_owner(&frame_table[pagetable_val(
+ d->arch.phys_table)>>PAGE_SHIFT]) );
}
}
shadow_unlock(d);
}
-static unsigned long
+unsigned long
gpfn_to_mfn_safe(struct domain *d, unsigned long gpfn)
{
ASSERT( shadow_mode_translate(d) );
{
printk("gpfn_to_mfn_safe(d->id=%d, gpfn=%p) => 0 l2e=%p\n",
d->id, gpfn, l2_pgentry_val(l2e));
- return 0;
+ return INVALID_MFN;
}
unsigned long l1tab = l2_pgentry_val(l2e) & PAGE_MASK;
l1_pgentry_t *l1 = map_domain_mem(l1tab);
{
printk("gpfn_to_mfn_safe(d->id=%d, gpfn=%p) => 0 l1e=%p\n",
d->id, gpfn, l1_pgentry_val(l1e));
- return 0;
+ return INVALID_MFN;
}
return l1_pgentry_val(l1e) >> PAGE_SHIFT;
else
limit = DOMAIN_ENTRIES_PER_L2_PAGETABLE;
- if ( unlikely(current->domain != d) && !shadow_mode_external(d) )
+ for ( i = 0; i < limit; i++ )
{
- // Can't use __gpfn_to_mfn() if we don't have one of this domain's
- // page tables currently installed. What a pain in the neck!
- //
- // This isn't common -- it only happens during shadow mode setup
- // and mode changes.
- //
- perfc_incrc(shadow_hl2_other_domain);
- for ( i = 0; i < limit; i++ )
- {
- unsigned long gl2e = l2_pgentry_val(gl2[i]);
- unsigned long mfn;
+ unsigned long gl2e = l2_pgentry_val(gl2[i]);
+ unsigned long hl2e;
- if ( (gl2e & _PAGE_PRESENT) &&
- (mfn = gpfn_to_mfn_safe(d, gl2e >> PAGE_SHIFT)) )
- {
- hl2[i] = mk_l1_pgentry((mfn << PAGE_SHIFT) | __PAGE_HYPERVISOR);
- get_page(pfn_to_page(mfn), d);
- }
- else
- {
- hl2[i] = mk_l1_pgentry(0);
- }
- }
- }
- else
- {
- for ( i = 0; i < limit; i++ )
- {
- unsigned long gl2e = l2_pgentry_val(gl2[i]);
- unsigned long mfn;
+ hl2e_propagate_from_guest(d, gl2e, &hl2e);
- if ( (gl2e & _PAGE_PRESENT) &&
- (mfn = __gpfn_to_mfn(d, gl2e >> PAGE_SHIFT)) )
- {
- hl2[i] = mk_l1_pgentry((mfn << PAGE_SHIFT) | __PAGE_HYPERVISOR);
- get_page(pfn_to_page(mfn), d);
- }
- else
- hl2[i] = mk_l1_pgentry(0);
- }
+ if ( (hl2e & _PAGE_PRESENT) &&
+ !get_page(pfn_to_page(hl2e >> PAGE_SHIFT), d) )
+ hl2e = 0;
+
+ hl2[i] = mk_l1_pgentry(hl2e);
}
if ( !shadow_mode_external(d) )
SH_VVLOG("4a: l1 not shadowed");
gl1mfn = __gpfn_to_mfn(d, gl1pfn);
- if ( unlikely(!gl1mfn) )
+ if ( unlikely(!VALID_MFN(gl1mfn)) )
{
// Attempt to use an invalid pfn as an L1 page.
// XXX this needs to be more graceful!
l1mfn = __gpfn_to_mfn(d, l2e >> PAGE_SHIFT);
// If the l1 pfn is invalid, it can't be out of sync...
- if ( !l1mfn )
+ if ( !VALID_MFN(l1mfn) )
return 0;
if ( page_out_of_sync(&frame_table[l1mfn]) &&
struct shadow_status *a;
unsigned long sl1mfn = __shadow_status(d, gpfn, PGT_l1_shadow);
u32 count = 0;
+ u32 write_refs;
ASSERT(spin_is_locked(&d->arch.shadow_lock));
ASSERT(gmfn);
+ perfc_incrc(remove_write_access);
+
+ if ( (frame_table[gmfn].u.inuse.type_info & PGT_type_mask) ==
+ PGT_writable_page )
+ {
+ write_refs = (frame_table[gmfn].u.inuse.type_info & PGT_count_mask);
+ if ( write_refs == 0 )
+ {
+ perfc_incrc(remove_write_access_easy);
+ return 0;
+ }
+ }
+
for (i = 0; i < shadow_ht_buckets; i++)
{
a = &d->arch.shadow_ht[i];
// XXX - This hack works for linux guests.
// Need a better solution long term.
if ( !(new_pde & _PAGE_PRESENT) && unlikely(new_pde != 0) &&
- (frame_table[smfn].u.inuse.type_info & PGT_pinned) &&
- !unshadow )
+ !unshadow &&
+ (frame_table[smfn].u.inuse.type_info & PGT_pinned) )
{
perfc_incrc(unshadow_l2_count);
unshadow = 1;
}
break;
default:
- BUG();
+ for ( i = 0; i < L2_PAGETABLE_ENTRIES; i++ )
+ {
+ if ( !is_guest_l2_slot(i) && !external )
+ continue;
+
+ unsigned new_pde = guest[i];
+ if ( new_pde != snapshot[i] )
+ {
+ need_flush |= validate_hl2e_change(d, new_pde, &shadow[i]);
+
+ // can't update snapshots of linear page tables -- they
+ // are used multiple times...
+ //
+ // snapshot[i] = new_pde;
+ }
+ }
break;
}
return 0;
}
- l1pte_write_fault(ed, &gpte, &spte, va);
+ if ( !l1pte_write_fault(ed, &gpte, &spte, va) )
+ {
+ SH_VVLOG("shadow_fault - EXIT: l1pte_write_fault failed");
+ perfc_incrc(write_fault_bail);
+ shadow_unlock(d);
+ return 0;
+ }
}
else
{
- l1pte_read_fault(d, &gpte, &spte);
+ if ( !l1pte_read_fault(d, &gpte, &spte) )
+ {
+ SH_VVLOG("shadow_fault - EXIT: l1pte_read_fault failed");
+ perfc_incrc(read_fault_bail);
+ shadow_unlock(d);
+ return 0;
+ }
}
/*
if ( old_smfn )
put_shadow_ref(old_smfn);
- SH_VVLOG("0: __update_pagetables(gmfn=%p, smfn=%p)", gmfn, smfn);
+ SH_VVLOG("__update_pagetables(gmfn=%p, smfn=%p)", gmfn, smfn);
/*
* arch.shadow_vtable
gpfn = gpte >> PAGE_SHIFT;
gmfn = __gpfn_to_mfn(d, gpfn);
+ if ( !VALID_MFN(gmfn) )
+ FAIL("invalid gpfn=%p gpte=%p\n", __func__, gpfn, gpte);
+
page_table_page = mfn_is_page_table(gmfn);
if ( (spte & _PAGE_RW ) && !(gpte & _PAGE_RW) && !oos_ptes )
unsigned long ptbase_mfn = 0;
int errors = 0, limit, oos_pdes = 0;
- audit_domain(d);
+ _audit_domain(d, AUDIT_QUIET);
shadow_lock(d);
sh_check_name = s;
/*
* The guest CR3 must be pointing to the guest physical.
*/
- if (!(mfn = phys_to_machine_mapping(
- d->arch.arch_vmx.cpu_cr3 >> PAGE_SHIFT)))
+ if (!VALID_MFN(mfn = phys_to_machine_mapping(
+ d->arch.arch_vmx.cpu_cr3 >> PAGE_SHIFT)))
{
VMX_DBG_LOG(DBG_LEVEL_VMMU, "Invalid CR3 value = %lx",
d->arch.arch_vmx.cpu_cr3);
/* Copy the initial ramdisk. */
if ( initrd_len != 0 )
memcpy((void *)vinitrd_start, initrd_start, initrd_len);
-
+
+ d->next_io_page = d->max_pages;
+
/* Set up start info area. */
si = (start_info_t *)vstartinfo_start;
memset(si, 0, PAGE_SIZE);
si->nr_pages = d->tot_pages;
+#define NASTY_HACK
+#ifdef NASTY_HACK
+ si->shared_info = d->next_io_page << PAGE_SHIFT;
+ set_machinetophys(virt_to_phys(d->shared_info) >> PAGE_SHIFT,
+ d->next_io_page);
+ d->next_io_page++;
+#else
si->shared_info = virt_to_phys(d->shared_info);
+#endif
si->flags = SIF_PRIVILEGED | SIF_INITDOMAIN;
si->pt_base = vpt_start;
si->nr_pt_frames = nr_pt_pages;
for ( pfn = 0; pfn < d->tot_pages; pfn++ )
{
mfn = pfn + (alloc_start>>PAGE_SHIFT);
+#if 0
#ifndef NDEBUG
#define REVERSE_START ((v_end - dsi.v_start) >> PAGE_SHIFT)
if ( pfn > REVERSE_START )
mfn = (alloc_end>>PAGE_SHIFT) - (pfn - REVERSE_START);
+#endif
#endif
((u32 *)vphysmap_start)[pfn] = mfn;
machine_to_phys_mapping[mfn] = pfn;
/* Use a spare PTE bit to mark entries ready for recycling. */
#define READY_FOR_TLB_FLUSH (1<<10)
+#ifdef WATCH_MAP_DOMAIN_CALLERS
+int map_domain_mem_noisy = 1;
+#endif
+
+
static void flush_all_ready_maps(void)
{
unsigned long *cache = mapcache;
while ( ((unsigned long)(++cache) & ~PAGE_MASK) != 0 );
}
-
-void *map_domain_mem(unsigned long pa)
+void *_map_domain_mem(unsigned long pa)
{
unsigned long va;
unsigned int idx, cpu = smp_processor_id();
ds, es, fs, gs, ss, cs);
show_stack((unsigned long *)®s->esp);
+ if ( GUEST_MODE(regs) )
+ show_guest_stack();
}
void show_page_walk(unsigned long addr)
printk("Scrubbing Free RAM: ");
+#ifdef WATCH_MAP_DOMAIN_CALLERS
+ int old_map_domain_mem_noisy = map_domain_mem_noisy;
+ map_domain_mem_noisy = 0;
+#endif
+
for ( pfn = 0; pfn < (bitmap_size * 8); pfn++ )
{
/* Every 100MB, print a progress dot and appease the watchdog. */
spin_unlock_irqrestore(&heap_lock, flags);
}
+#ifdef WATCH_MAP_DOMAIN_CALLERS
+ map_domain_mem_noisy = old_map_domain_mem_noisy;
+#endif
+
printk("done.\n");
}
list_add_tail(&(_pfn)->list, &(_dom)->xenpage_list); \
spin_unlock(&(_dom)->page_alloc_lock); \
} while ( 0 )
+#define SHARE_PFN_WITH_DOMAIN2(_pfn, _dom) \
+ do { \
+ page_set_owner((_pfn), (_dom)); \
+ /* The incremented type count is intended to pin to 'writable'. */ \
+ (_pfn)->u.inuse.type_info = PGT_writable_page | PGT_validated | 1; \
+ wmb(); /* install valid domain ptr before updating refcnt. */ \
+ spin_lock(&(_dom)->page_alloc_lock); \
+ /* _dom holds an allocation reference + writable ref */ \
+ ASSERT((_pfn)->count_info == 0); \
+ (_pfn)->count_info |= PGC_allocated | 2; \
+ if ( unlikely((_dom)->xenheap_pages++ == 0) ) \
+ get_knownalive_domain(_dom); \
+ list_add_tail(&(_pfn)->list, &(_dom)->page_list); \
+ spin_unlock(&(_dom)->page_alloc_lock); \
+ } while ( 0 )
extern struct pfn_info *frame_table;
extern unsigned long frame_table_size;
unlikely((nx & PGC_count_mask) == 0) || /* Count overflow? */
unlikely(d != _domain) ) /* Wrong owner? */
{
+ if ( !domain->arch.shadow_mode )
DPRINTK("Error pfn %p: rd=%p(%d), od=%p(%d), caf=%08x, taf=%08x\n",
page_to_pfn(page), domain, (domain ? domain->id : -1),
page_get_owner(page),
return 1;
}
-void put_page_type(struct pfn_info *page);
-int get_page_type(struct pfn_info *page, u32 type);
+//#define MFN1_TO_WATCH 0x1d8
+#ifdef MFN1_TO_WATCH
+#define get_page_type(__p, __t) ( \
+{ \
+ struct pfn_info *_p = (__p); \
+ u32 _t = (__t); \
+ if ( page_to_pfn(_p) == MFN1_TO_WATCH ) \
+ printk("get_page_type(%x) c=%p ot=%p @ %s:%d in %s\n", \
+ MFN1_TO_WATCH, frame_table[MFN1_TO_WATCH].count_info, \
+ frame_table[MFN1_TO_WATCH].u.inuse.type_info, \
+ __FILE__, __LINE__, __func__); \
+ _get_page_type(_p, _t); \
+})
+#define put_page_type(__p) ( \
+{ \
+ struct pfn_info *_p = (__p); \
+ if ( page_to_pfn(_p) == MFN1_TO_WATCH ) \
+ printk("put_page_type(%x) c=%p ot=%p @ %s:%d in %s\n", \
+ MFN1_TO_WATCH, frame_table[MFN1_TO_WATCH].count_info, \
+ frame_table[MFN1_TO_WATCH].u.inuse.type_info, \
+ __FILE__, __LINE__, __func__); \
+ _put_page_type(_p); \
+})
+#else
+#define _get_page_type get_page_type
+#define _put_page_type put_page_type
+#endif
+
+void _put_page_type(struct pfn_info *page);
+int _get_page_type(struct pfn_info *page, u32 type);
int get_page_from_l1e(l1_pgentry_t l1e, struct domain *d);
void put_page_from_l1e(l1_pgentry_t l1e, struct domain *d);
* been used by the read-only MPT map.
*/
#define __phys_to_machine_mapping ((unsigned long *)RO_MPT_VIRT_START)
+#define INVALID_MFN (~0UL)
+#define VALID_MFN(_mfn) (!((_mfn) & (1U<<31)))
/* Returns the machine physical */
static inline unsigned long phys_to_machine_mapping(unsigned long pfn)
unsigned long mfn;
l1_pgentry_t pte;
- if (__get_user(l1_pgentry_val(pte), (__phys_to_machine_mapping + pfn)))
- mfn = 0;
- else
+ if ( !__get_user(l1_pgentry_val(pte), (__phys_to_machine_mapping + pfn)) &&
+ (l1_pgentry_val(pte) & _PAGE_PRESENT) )
mfn = l1_pgentry_to_phys(pte) >> PAGE_SHIFT;
+ else
+ mfn = INVALID_MFN;
return mfn;
}
(PERDOMAIN_VIRT_START >> (L2_PAGETABLE_SHIFT - L1_PAGETABLE_SHIFT))))
#define shadow_lock_init(_d) spin_lock_init(&(_d)->arch.shadow_lock)
-#define shadow_lock(_d) spin_lock(&(_d)->arch.shadow_lock)
+#define shadow_lock(_d) do { ASSERT(!spin_is_locked(&(_d)->arch.shadow_lock)); spin_lock(&(_d)->arch.shadow_lock); } while (0)
#define shadow_unlock(_d) spin_unlock(&(_d)->arch.shadow_lock)
extern void shadow_mode_init(void);
extern void free_monitor_pagetable(struct exec_domain *ed);
extern void __shadow_sync_all(struct domain *d);
extern int __shadow_out_of_sync(struct exec_domain *ed, unsigned long va);
+extern int set_p2m_entry(
+ struct domain *d, unsigned long pfn, unsigned long mfn);
static inline unsigned long __shadow_status(
struct domain *d, unsigned long gpfn, unsigned long stype);
phys_to_machine_mapping(gpfn); }) \
: (gpfn) )
+extern unsigned long gpfn_to_mfn_safe(
+ struct domain *d, unsigned long gpfn);
+
/************************************************************************/
struct shadow_status {
if ( unlikely(!res) )
{
perfc_incrc(shadow_get_page_fail);
- FSH_LOG("%s failed to get ref l1e=%p\n", l1_pgentry_val(l1e));
+ FSH_LOG("%s failed to get ref l1e=%p\n", __func__, l1_pgentry_val(l1e));
}
return res;
unsigned long old_hl2e =
l1_pgentry_val(ed->arch.hl2_vtable[l2_table_offset(va)]);
unsigned long new_hl2e =
- (mfn ? ((mfn << PAGE_SHIFT) | __PAGE_HYPERVISOR) : 0);
+ (VALID_MFN(mfn) ? ((mfn << PAGE_SHIFT) | __PAGE_HYPERVISOR) : 0);
// only do the ref counting if something important changed.
//
/************************************************************************/
+//#define MFN3_TO_WATCH 0x1ff6e
+#ifdef MFN3_TO_WATCH
+#define get_shadow_ref(__s) ( \
+{ \
+ unsigned long _s = (__s); \
+ if ( _s == MFN3_TO_WATCH ) \
+ printk("get_shadow_ref(%x) oc=%d @ %s:%d in %s\n", \
+ MFN3_TO_WATCH, frame_table[_s].count_info, \
+ __FILE__, __LINE__, __func__); \
+ _get_shadow_ref(_s); \
+})
+#define put_shadow_ref(__s) ( \
+{ \
+ unsigned long _s = (__s); \
+ if ( _s == MFN3_TO_WATCH ) \
+ printk("put_shadow_ref(%x) oc=%d @ %s:%d in %s\n", \
+ MFN3_TO_WATCH, frame_table[_s].count_info, \
+ __FILE__, __LINE__, __func__); \
+ _put_shadow_ref(_s); \
+})
+#else
+#define _get_shadow_ref get_shadow_ref
+#define _put_shadow_ref put_shadow_ref
+#endif
+
/*
* Add another shadow reference to smfn.
*/
static inline int
-get_shadow_ref(unsigned long smfn)
+_get_shadow_ref(unsigned long smfn)
{
u32 x, nx;
* Drop a shadow reference to smfn.
*/
static inline void
-put_shadow_ref(unsigned long smfn)
+_put_shadow_ref(unsigned long smfn)
{
u32 x, nx;
ASSERT(spin_is_locked(&d->arch.shadow_lock));
ASSERT(d->arch.shadow_dirty_bitmap != NULL);
+ if ( !VALID_MFN(mfn) )
+ return rc;
+
pfn = __mfn_to_gpfn(d, mfn);
/*
struct exec_domain *ed, unsigned long gpfn, unsigned long mfn,
unsigned long va);
-static inline void l1pte_write_fault(
+static inline int l1pte_write_fault(
struct exec_domain *ed, unsigned long *gpte_p, unsigned long *spte_p,
unsigned long va)
{
unsigned long gpte = *gpte_p;
unsigned long spte;
unsigned long gpfn = gpte >> PAGE_SHIFT;
- unsigned long mfn = __gpfn_to_mfn(d, gpfn);
+ unsigned long gmfn = __gpfn_to_mfn(d, gpfn);
- //printk("l1pte_write_fault gmfn=%p\n", mfn);
+ //printk("l1pte_write_fault gmfn=%p\n", gmfn);
- if ( unlikely(!mfn) )
+ if ( unlikely(!VALID_MFN(gmfn)) )
{
SH_LOG("l1pte_write_fault: invalid gpfn=%p", gpfn);
*spte_p = 0;
- return;
+ return 0;
}
ASSERT(gpte & _PAGE_RW);
gpte |= _PAGE_DIRTY | _PAGE_ACCESSED;
- spte = (mfn << PAGE_SHIFT) | (gpte & ~PAGE_MASK);
+ spte = (gmfn << PAGE_SHIFT) | (gpte & ~PAGE_MASK);
SH_VVLOG("l1pte_write_fault: updating spte=0x%p gpte=0x%p", spte, gpte);
if ( shadow_mode_log_dirty(d) )
- __mark_dirty(d, mfn);
+ __mark_dirty(d, gmfn);
- if ( mfn_is_page_table(mfn) )
- shadow_mark_va_out_of_sync(ed, gpfn, mfn, va);
+ if ( mfn_is_page_table(gmfn) )
+ shadow_mark_va_out_of_sync(ed, gpfn, gmfn, va);
*gpte_p = gpte;
*spte_p = spte;
+
+ return 1;
}
-static inline void l1pte_read_fault(
+static inline int l1pte_read_fault(
struct domain *d, unsigned long *gpte_p, unsigned long *spte_p)
{
unsigned long gpte = *gpte_p;
unsigned long pfn = gpte >> PAGE_SHIFT;
unsigned long mfn = __gpfn_to_mfn(d, pfn);
- if ( unlikely(!mfn) )
+ if ( unlikely(!VALID_MFN(mfn)) )
{
SH_LOG("l1pte_read_fault: invalid gpfn=%p", pfn);
*spte_p = 0;
- return;
+ return 0;
}
gpte |= _PAGE_ACCESSED;
SH_VVLOG("l1pte_read_fault: updating spte=0x%p gpte=0x%p", spte, gpte);
*gpte_p = gpte;
*spte_p = spte;
+
+ return 1;
}
static inline void l1pte_propagate_from_guest(
struct domain *d, unsigned long gpte, unsigned long *spte_p)
{
unsigned long pfn = gpte >> PAGE_SHIFT;
- unsigned long mfn = __gpfn_to_mfn(d, pfn);
- unsigned long spte;
+ unsigned long mfn, spte;
spte = 0;
- if ( mfn &&
- ((gpte & (_PAGE_PRESENT|_PAGE_ACCESSED) ) ==
- (_PAGE_PRESENT|_PAGE_ACCESSED)) ) {
-
+ if ( ((gpte & (_PAGE_PRESENT|_PAGE_ACCESSED) ) ==
+ (_PAGE_PRESENT|_PAGE_ACCESSED)) &&
+ VALID_MFN(mfn = __gpfn_to_mfn(d, pfn)) )
+ {
spte = (mfn << PAGE_SHIFT) | (gpte & ~PAGE_MASK);
if ( shadow_mode_log_dirty(d) ||
spte &= ~_PAGE_RW;
}
}
-#if 0
+#if 0
if ( spte || gpte )
- SH_VLOG("%s: gpte=0x%p, new spte=0x%p", __func__, gpte, spte);
-
+ SH_VVLOG("%s: gpte=%p, new spte=%p", __func__, gpte, spte);
#endif
+
*spte_p = spte;
}
+static inline void hl2e_propagate_from_guest(
+ struct domain *d, unsigned long gpde, unsigned long *hl2e_p)
+{
+ unsigned long pfn = gpde >> PAGE_SHIFT;
+ unsigned long mfn, hl2e;
+
+ hl2e = 0;
+
+ if ( gpde & _PAGE_PRESENT )
+ {
+ if ( unlikely((current->domain != d) && !shadow_mode_external(d)) )
+ {
+ // Can't use __gpfn_to_mfn() if we don't have one of this domain's
+ // page tables currently installed. What a pain in the neck!
+ //
+ // This isn't common -- it only happens during shadow mode setup
+ // and mode changes.
+ //
+ mfn = gpfn_to_mfn_safe(d, pfn);
+ }
+ else
+ mfn = __gpfn_to_mfn(d, pfn);
+
+ if ( VALID_MFN(mfn) )
+ hl2e = (mfn << PAGE_SHIFT) | __PAGE_HYPERVISOR;
+ }
+
+ if ( hl2e || gpde )
+ SH_VVLOG("%s: gpde=%p hl2e=%p", __func__, gpde, hl2e);
+
+ *hl2e_p = hl2e;
+}
+
static inline void l2pde_general(
struct domain *d,
unsigned long *gpde_p,
}
if ( spde || gpde )
- SH_VLOG("%s: gpde=0x%p, new spde=0x%p", __func__, gpde, spde);
+ SH_VVLOG("%s: gpde=%p, new spde=%p", __func__, gpde, spde);
*spde_p = spde;
}
return 1;
}
+// returns true if a tlb flush is needed
+//
+static int inline
+validate_hl2e_change(
+ struct domain *d,
+ unsigned long new_gpde,
+ unsigned long *shadow_hl2e_p)
+{
+ unsigned long old_hl2e, new_hl2e;
+
+ perfc_incrc(validate_hl2e_calls);
+
+ old_hl2e = *shadow_hl2e_p;
+ hl2e_propagate_from_guest(d, new_gpde, &new_hl2e);
+
+ // Only do the ref counting if something important changed.
+ //
+ if ( ((old_hl2e | new_hl2e) & _PAGE_PRESENT) &&
+ ((old_hl2e ^ new_hl2e) & (PAGE_MASK | _PAGE_PRESENT)) )
+ {
+ perfc_incrc(validate_hl2e_changes);
+
+ if ( (new_hl2e & _PAGE_PRESENT) &&
+ !get_page(pfn_to_page(new_hl2e >> PAGE_SHIFT), d) )
+ new_hl2e = 0;
+ if ( old_hl2e & _PAGE_PRESENT )
+ put_page(pfn_to_page(old_hl2e >> PAGE_SHIFT));
+ }
+
+ *shadow_hl2e_p = new_hl2e;
+
+ // paranoia rules!
+ return 1;
+
+}
+
// returns true if a tlb flush is needed
//
static int inline
{
unsigned long gmfn = ((current->domain == d)
? __gpfn_to_mfn(d, gpfn)
- : 0);
+ : INVALID_MFN);
ASSERT(spin_is_locked(&d->arch.shadow_lock));
ASSERT(gpfn == (gpfn & PGT_mfn_mask));
ASSERT(stype && !(stype & ~PGT_type_mask));
- if ( gmfn && ((stype != PGT_snapshot)
- ? !mfn_is_page_table(gmfn)
- : !mfn_out_of_sync(gmfn)) )
+ if ( VALID_MFN(gmfn) &&
+ ((stype != PGT_snapshot)
+ ? !mfn_is_page_table(gmfn)
+ : !mfn_out_of_sync(gmfn)) )
{
perfc_incrc(shadow_status_shortcut);
ASSERT(___shadow_status(d, gpfn, stype) == 0);
static inline void delete_shadow_status(
- struct domain *d, unsigned int gpfn, unsigned int stype)
+ struct domain *d, unsigned long gpfn, unsigned long gmfn, unsigned int stype)
{
struct shadow_status *p, *x, *n, *head;
unsigned long key = gpfn | stype;
found:
// release ref to page
- put_page(pfn_to_page(__gpfn_to_mfn(d, gpfn)));
+ put_page(pfn_to_page(gmfn));
shadow_audit(d, 0);
}
SH_VVLOG("set gpfn=%p gmfn=%p smfn=%p t=%p\n", gpfn, gmfn, smfn, stype);
ASSERT(spin_is_locked(&d->arch.shadow_lock));
- ASSERT(gpfn && !(gpfn & ~PGT_mfn_mask));
+
+ ASSERT(shadow_mode_translate(d) || gpfn);
+ ASSERT(!(gpfn & ~PGT_mfn_mask));
+
ASSERT(pfn_is_ram(gmfn)); // XXX need to be more graceful
ASSERT(smfn && !(smfn & ~PGT_mfn_mask));
ASSERT(stype && !(stype & ~PGT_type_mask));
extern void handle_mmio(unsigned long, unsigned long);
extern int vmx_setup_platform(struct exec_domain *, execution_context_t *);
-#define mmio_space(gpa) (!phys_to_machine_mapping((gpa) >> PAGE_SHIFT))
+// XXX - think about this -- maybe use bit 30 of the mfn to signify an MMIO frame.
+#define mmio_space(gpa) (!VALID_MFN(phys_to_machine_mapping((gpa) >> PAGE_SHIFT)))
#endif
extern unsigned long *mapcache;
#define MAPCACHE_ENTRIES 1024
+
+//#define WATCH_MAP_DOMAIN_CALLERS 1
+#ifdef WATCH_MAP_DOMAIN_CALLERS
+extern int map_domain_mem_noisy;
+#define map_domain_mem(__mdm_pa) ( \
+{ \
+ unsigned long _mdm_pa = (__mdm_pa); \
+ if ( map_domain_mem_noisy ) \
+ printk("map_domain_mem(%p) @ %s:%d in %s\n", \
+ _mdm_pa, __FILE__, __LINE__, __func__); \
+ _map_domain_mem(_mdm_pa); \
+})
+#else
+#define _map_domain_mem map_domain_mem
+#endif
+
/*
* Maps a given physical address, returning corresponding virtual address.
* The entire page containing that VA is now accessible until a
* corresponding call to unmap_domain_mem().
*/
-extern void *map_domain_mem(unsigned long pa);
+extern void *_map_domain_mem(unsigned long pa);
/*
* Pass a VA within a page previously mapped with map_domain_mem().
PERFCOUNTER_CPU(validate_pte_changes, "validate_pte makes changes")
PERFCOUNTER_CPU(validate_pde_calls, "calls to validate_pde_change")
PERFCOUNTER_CPU(validate_pde_changes, "validate_pde makes changes")
-PERFCOUNTER_CPU(shadow_hl2_other_domain, "shadow_hl2 from other domain")
+PERFCOUNTER_CPU(validate_hl2e_calls, "calls to validate_hl2e_change")
+PERFCOUNTER_CPU(validate_hl2e_changes, "validate_hl2e makes changes")
PERFCOUNTER_CPU(gpfn_to_mfn_safe, "calls to gpfn_to_mfn_safe")
+PERFCOUNTER_CPU(write_fault_bail, "sf bailed due to write_fault")
+PERFCOUNTER_CPU(read_fault_bail, "sf bailed due to read_fault")
+PERFCOUNTER_CPU(exception_fixed, "pre-exception fixed")
+PERFCOUNTER_CPU(remove_write_access, "calls to remove_write_access")
+PERFCOUNTER_CPU(remove_write_access_easy, "easy outs of remove_write_access")
struct list_head xenpage_list; /* linked list, of size xenheap_pages */
unsigned int tot_pages; /* number of pages currently possesed */
unsigned int max_pages; /* maximum value for tot_pages */
+ unsigned int next_io_page; /* next io pfn to give to domain */
unsigned int xenheap_pages; /* # pages allocated from Xen heap */
/* Scheduling. */